####THIS CHUNK WILL NEED TO BE ADJUSTED TO READ FROM GITHUB DATA###
#THIS CHUNK MAKES MORE SENSE TO MOVE AFTER FUNCTIONS ARE GENERATED
#IT IS LOCATED HERE FOR VISIBILITY ONLY, ONCE MOVED TO GITHUB, RELOCATE TO A MORE LOGICAL
#LOCATION IN THE CODE AS DESIRED
getwd()
## [1] "C:/Users/jeffb/Desktop/Life/personal-projects/COVID"
#read in full DSHS data with county and TSA information
full.dshs<-read.csv("combined-datasets/county.csv")#change path as needed
tsa.dshs<-read.csv("combined-datasets/tsa.csv")#change path as needed
#rt.data.clean function will clean data, dropping unwanted variables
rt.data.clean<-function(covid.data)#note: mydata will be either full.dshs or tsa.dshs data
{
library(dplyr)
library(tidyr)
library(reshape2)
str(covid.data)
#keep variables from this list if they are in the data set
#(county, date, population, TSA_Name, cases_daily, Population_DSHS, Population_Census)
covid.data<-covid.data[,names(covid.data)%in%
c("County", #name of texas county
"Date", #date of data recorded
"TSA_Name", #name of texas TSA group for county
"Cases_Daily", #daily new cases
"Population_DSHS", #population for county per DSHS data
"Population_Census")] #population for county per Census estimates
#Note: Census data is from 2010, these values are projections
#from that Census as provided by the Census Bureau
#change Date variable to date format
covid.data$Date<-as.Date(covid.data$Date)
#change cases_daily to numeric - if not already numeric
covid.data$Cases_Daily<-as.numeric(covid.data$Cases_Daily)
return(covid.data)
}
#separate county, TSA and State data into separate Data frames
rt.data.organize<-function(mycounty, mytsa){
library(dplyr)
library(tidyr)
#call the rt.data.clean function on mydata
cleaned.county<-rt.data.clean(mycounty)
cleaned.tsa<-rt.data.clean(mytsa)
#Drop TSA_Name from county data frame
county.df<-cleaned.county%>%dplyr::select(-TSA_Name)
#create TSA.df, use rt.data.clean function
TSA.df<-rt.data.clean(mytsa)
#create state.df
#at the state level we want daily new cases, and population variables from DSHS and Census data
state.temp<-cleaned.tsa%>%
group_by(Date)%>%
mutate(state_cases_daily=sum(Cases_Daily, na.rm=TRUE))%>%#generate variable of state daily cases
mutate(state_pop_DSHS=sum(Population_DSHS, na.rm=TRUE))%>%#generate state population_DSHS variable
mutate(state_pop_Census=sum(Population_Census, na.rm=TRUE))%>%#generate state population_Census variable
dplyr::select(Date,state_cases_daily, state_pop_DSHS, state_pop_Census)%>%#select only variables of interest
distinct()#keep only distinct rows (this will drop repeated rows, now that we have only state level vars)
state.df<-data.frame(Date=state.temp$Date,
Cases_Daily=state.temp$state_cases_daily,
Population_DSHS=state.temp$state_pop_DSHS,
Population_Census=state.temp$state_pop_Census)
#return a list of the three dataframes generated
return(list(county=county.df, TSA=TSA.df, state=state.df))
}
rt.df.extraction<-function(Rt.estimate.output){
#first convert named vector back to dataframe
#reorder and rename columns from the R0.estimate output
rt.df<-setNames(stack(Rt.estimate.output$estimates$TD$R)[2:1], c('Date', 'Rt'))
#extract the upper and lower limits of the Rt 95% confidence intervals, these will be lists
CI.lower.list<-Rt.estimate.output$estimates$TD$conf.int$lower
CI.upper.list<-Rt.estimate.output$estimates$TD$conf.int$upper
#use unlist function to format as vector
CI.lower <- unlist(CI.lower.list, recursive = TRUE, use.names = TRUE)
CI.upper <- unlist(CI.upper.list, recursive = TRUE, use.names = TRUE)
#add the upper and lower bounds to the dataframe
rt.df$lower<-CI.lower
rt.df$upper<-CI.upper
return(rt.df)
}
)
#Function to generate Rt estimates, need to read in data frame and population size
covid.rt<-function(mydata){
library(R0)
#change na values to 0
mydata<-mydata%>%replace(is.na(.),0)
#create a vector of new cases
mydata.new<-pull(mydata, Cases_Daily)
#create a vector of dates
date.vector<-pull(mydata, Date)
#create a named numerical vector using the date.vector as names of new cases
#Note: this is needed to run R0 package function estimate.R()
names(mydata.new)<-c(date.vector)
#create the generation time
##### NOTE: THIS MAY NEED MODIFICATION ONCE MORE INFORMATIONIS AVAILABLE ####
gen.time<-generation.time("lognormal", c(4.0, 2.9))#verify these values and distribution/update as needed
#gen.time<-generation.time("gamma", c(3,1.5))
#get DSHS population and census population
pop.DSHS<-mydata$Population_DSHS[1]
pop.census<-mydata$Population_Census[1]
#Adjust Dataframe to run from 3/9/20 to current date
#Find the number of days to run the rt estimate
min.date<-min(mydata$Date)
max.date<-max(mydata$Date)
i<-which(mydata$Date=="2020-03-09")
total.days<-difftime(max.date, min.date, units="days")
total.days<-as.integer(total.days)
#reduce the vector to be rows from March 9 to current date
mydata.newest<-mydata.new[i:total.days]
#run estimate.R() using gen.time, total.days, pop.DSHS
rt.DSHS<-estimate.R(mydata.newest,
gen.time,
#begin=as.integer(1),
#end=total.days,
methods=c("TD"),
pop.size=pop.DSHS,
nsim=1000)
#run estimate.R() using gen.time, total.days, pop.Census
rt.Census<-estimate.R(mydata.newest,
gen.time,
methods=c("TD"),
pop.size=pop.Census,
nsim=1000)
# use rt extraction function to get rt estimates, upper and lower bounds in data frame for both rt.DSHS and rt.Census
rt.DSHS.df<-rt.df.extraction(rt.DSHS)
rt.Census.df<-rt.df.extraction(rt.Census)
#return list of dataframes
return(list(rt.DSHS=rt.DSHS.df, rt.Census=rt.Census.df))
}
# TODO
# TODO: fix incid should be positive and non-missing
#apply the covid.rt function to the TSA.daily data by TSA region
# TSA.daily%>%group_by(TSA_Name)%>%covid.rt()
# TSA.daily%>%group_by(TSA_Name)%>%group_map(~covid.rt)
# TSA_complete = na.omit(TSA.daily)
# TSA_RT = nlme::gapply(TSA.daily, FUN = covid.rt, groups = TSA.daily$TSA_Name)
#
# TSA_list = rep(NA, length = length(unique(TSA.daily$TSA_Name)))
# for (TSA in TSA.daily$TSA_Name) {
# out = covid.rt(subset(TSA.daily, TSA_Name == TSA))
# TSA_RT = append(test, out)
# }
#run covid.rt function on state level data
state.rt.list<-covid.rt(state.daily)
## Warning: package 'R0' was built under R version 3.6.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Warning in est.R0.TD(epid = c(`2020-03-09` = 7, `2020-03-10` = 3, `2020-03-11` =
## 3, : Simulations may take several minutes.
## Warning in est.R0.TD(epid = c(`2020-03-09` = 7, `2020-03-10` = 3, `2020-03-11` =
## 3, : Using initial incidence as initial number of cases.
## Warning in est.R0.TD(epid = c(`2020-03-09` = 7, `2020-03-10` = 3, `2020-03-11` =
## 3, : Simulations may take several minutes.
## Warning in est.R0.TD(epid = c(`2020-03-09` = 7, `2020-03-10` = 3, `2020-03-11` =
## 3, : Using initial incidence as initial number of cases.
#extract rt estimates based on DSHS population estimates
state.rt.DSHSpop<-state.rt.list$rt.DSHS
#extract rt estimates based on Censuspopulation estimates
state.rt.Censuspop<-state.rt.list$rt.Census
write.csv(state.rt.DSHSpop, file="statistical-output/rt/state_rt.csv", row.names=FALSE)
#ts.data.clean function will clean data, dropping unwanted variables
#note: mydata will be either full.dshs or tsa.dshs data
ts.data.clean<-function(covid.data) {
library(dplyr)
library(tidyr)
library(reshape2)
str(covid.data)
#keep variables from this list if they are in the data set
#(county, date, population, TSA_Name, Cases_Cumulative, Population_DSHS, Population_Census)
covid.data<-covid.data[,names(covid.data)%in%
c("County", #name of texas county
"Date", #date of data recorded
"TSA_Name", #name of texas TSA group for county
"Metro_Area", #indicateor of metro/nonmetro county
"Cases_Cumulative", #daily new cases
"Population_DSHS", #population for county per DSHS data
"Population_Census")] #population for county per Census estimates
#Note: Census data is from 2010, these values are projections
#from that Census as provided by the Census Bureau
#change Date variable to date format
covid.data$Date<-as.Date(covid.data$Date)
#change Cases_Cumulative to numeric - if not already numeric
covid.data$Cases_Cumulative<-as.numeric(covid.data$Cases_Cumulative)
#change any Cases_Cumulative below zero to zero
covid.data<-covid.data%>%mutate(Cases_Cumulative=ifelse(Cases_Cumulative<0, 0, Cases_Cumulative))
return(covid.data)
}
#separate county, metro, TSA and State data into separate Data frames
ts.data.organize<-function(mycounty, mytsa){
library(dplyr)
library(tidyr)
### COUNTY ###
#call the ts.data.clean function on mydata
cleaned.county<-ts.data.clean(mycounty)
# drop NA dates
cleaned.county<-subset(cleaned.county, !is.na(Date) & Date>=as.Date('2020-03-04'))
#Drop TSA_Name from county data frame
county.df<-cleaned.county%>%dplyr::select(-TSA_Name, -Metro_Area)
county.df$Level = 'County'
colnames(county.df)[1] = 'Level_Name'
### METRO ###
#create metro.df by mutating by grouping by Metro
metro.temp<-cleaned.county%>%
group_by(Metro_Area,Date)%>%
mutate(metro_Cases_Cumulative=sum(Cases_Cumulative, na.rm=TRUE))%>%#generate variable of state daily cases
mutate(metro_pop_DSHS=sum(Population_DSHS, na.rm=TRUE))%>%#generate state population_DSHS variable
mutate(metro_pop_Census=sum(Population_Census, na.rm=TRUE))%>%#generate state population_Census variable
dplyr::select(Date,Metro_Area, metro_Cases_Cumulative, metro_pop_DSHS, metro_pop_Census)%>%#select only variables of interest
distinct()#keep only distinct rows (this will drop repeated rows, now that we have only state level va
metro.df<-data.frame(Date=metro.temp$Date,
Level = 'metro',
Level_Name=metro.temp$Metro_Area,
Cases_Cumulative=metro.temp$metro_Cases_Cumulative,
Population_DSHS=metro.temp$metro_pop_DSHS,
Population_Census=metro.temp$metro_pop_Census
)
# drop NA dates
metro.df<-subset(metro.df, !is.na(Date) & Date>=as.Date('2020-03-04'))
### TSA ###
TSA.df<-ts.data.clean(mytsa)
TSA.df<-subset(TSA.df, Date>=as.Date('2020-03-04'))
TSA.df$Level = 'TSA'
colnames(TSA.df)[2] = 'Level_Name'
### STATE ###
#create state.df
#at the state level we want daily new cases, and population variables from DSHS and Census data
state.temp<-TSA.df%>%
group_by(Date)%>%
mutate(state_Cases_Cumulative=sum(Cases_Cumulative, na.rm=TRUE))%>%#generate variable of state daily cases
mutate(state_pop_DSHS=sum(Population_DSHS, na.rm=TRUE))%>%#generate state population_DSHS variable
mutate(state_pop_Census=sum(Population_Census, na.rm=TRUE))%>%#generate state population_Census variable
dplyr::select(Date,state_Cases_Cumulative, state_pop_DSHS, state_pop_Census)%>%#select only variables of interest
distinct()#keep only distinct rows (this will drop repeated rows, now that we have only state level vars)
state.df<-data.frame(Date=state.temp$Date,
Level = 'State',
Level_Name = 'Texas',
Cases_Cumulative=state.temp$state_Cases_Cumulative,
Population_DSHS=state.temp$state_pop_DSHS,
Population_Census=state.temp$state_pop_Census
)
state.df<- subset(state.df, Date>=as.Date('2020-03-04'))
#return a list of the three dataframes generated
return(list(county=county.df, TSA=TSA.df, state=state.df, metro=metro.df))
}
library(forecast)
## Warning: package 'forecast' was built under R version 3.6.3
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
library(astsa)
## Warning: package 'astsa' was built under R version 3.6.3
##
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
##
## gas
library(forecast)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggplot2)
library(fpp2)
## Warning: package 'fpp2' was built under R version 3.6.3
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.6.3
##
## Attaching package: 'fma'
## The following objects are masked from 'package:astsa':
##
## chicken, sales
## The following objects are masked from 'package:MASS':
##
## cement, housing, petrol
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.6.3
##
## Attaching package: 'fpp2'
## The following object is masked from 'package:astsa':
##
## oil
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
covid.arima.forecast<-function(mydata)
{
maxdate<-max(mydata$Date)
mindate <-min(mydata$Date)
prediction.period<-10 #this can be changed as desired
if(max(mydata$Cases_Cumulative>=5, na.rm = TRUE))
{
#for the time series, you need a start day, which is the year, and the day number -
#we need to double check how this works exactly
my.timeseries<-ts(mydata$Cases_Cumulative)
#get a fit model using the best ARIMA from auto.arima
arima.fit<-forecast::auto.arima(my.timeseries)
#use predict() on the fit model
# prediction.period is how many more days to forecast
# 10 day forecast
arima.forecast<-forecast::forecast(arima.fit, h = prediction.period)
#auto correlation function
# diagnostics
acf<-ggAcf(my.timeseries, lag.max=30)
pacf<-ggPacf(my.timeseries, lag.max=30)
grid.arrange(acf, pacf, nrow=2)
ggsave(paste0('statistical-output/time-series/diagnostics/', mydata$Level[1],'/', mydata$Level_Name[1], '.png'))
#return a dataframe of the arima model(cumulative cases by date)
#and forecasted values ***NOT SURE HOW THIS WILL WORK***
arima.out<-data.frame(Date = seq(mindate, maxdate + 10, by = 'days'),
Cases.Cumulative = c(my.timeseries, arima.forecast[['mean']]),
CI.Lower = c(rep(NA, times = length(my.timeseries)), arima.forecast[['lower']][, 2]),
CI.Upper = c(rep(NA, times = length(my.timeseries)), arima.forecast[['upper']][, 2]))
} else {
arima.out<-data.frame(Date = seq(mindate, maxdate + 10, by = 'days'),
Cases.Cumulative = c(mydata$Cases_Cumulative, rep(NA, times = 10)),
CI.Lower = rep(NA, times = length(mydata$Date) + 10),
CI.Upper = rep(NA, times = length(mydata$Date) + 10)
)
}
return(arima.out)
}
#get county forecasts using covid.arima.forecast function
#load nlme package
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
##
## getResponse
## The following object is masked from 'package:dplyr':
##
## collapse
#apply the covid.arima.forecast function to the county.cumulative dataframe by county
#note that the output will be a list of dataframes
county.arima.output<-nlme::gapply(county.cumulative, FUN = covid.arima.forecast, groups=county.cumulative$Level_Name)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
#load data.table package
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
county.arima.df<-data.table::rbindlist(county.arima.output, idcol=TRUE)
colnames(county.arima.df)[1] = 'Level_Name'
county.arima.df$Level = 'County'
#get metro forecasts using covid.arima.forecast function
#load nlme package
library(nlme)
#apply the covid.arima.forecast function to the metro.cumulative dataframe by metro
#note that the output will be a list of dataframes
metro.arima.output<-nlme::gapply(metro.cumulative, FUN = covid.arima.forecast, groups=metro.cumulative$Level_Name)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
#load data.table package
library(data.table)
metro.arima.df<-data.table::rbindlist(metro.arima.output, idcol=TRUE)
colnames(metro.arima.df)[1] = 'Level_Name'
metro.arima.df$Level = 'metro'
#ge#load nlme package
library(nlme)
#apply the covid.arima.forecast function to the TSA.cumulative dataframe by TSA
#note that the output will be a list of dataframes
TSA.arima.output<-nlme::gapply(TSA.cumulative, FUN = covid.arima.forecast, groups=TSA.cumulative$Level_Name)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
#load data.table package
library(data.table)
TSA.arima.df<-data.table::rbindlist(TSA.arima.output, idcol=TRUE)
colnames(TSA.arima.df)[1] = 'Level_Name'
TSA.arima.df$Level = 'TSA'
#get Texas forecasts using covid.arima.forecast function
texas.arima.df<-covid.arima.forecast(state.cumulative)
## Saving 7 x 5 in image
texas.arima.df$Level = 'State'
texas.arima.df$Level_Name = 'Texas'
#export arima results into .csv files
write.csv(texas.arima.df, 'statistical-output/time-series/df/state.csv', row.names = F)
write.csv(TSA.arima.df, 'statistical-output/time-series/df/tsa.csv', row.names = F)
write.csv(county.arima.df, 'statistical-output/time-series/df/county.csv', row.names = F)
write.csv(metro.arima.df, 'statistical-output/time-series/df/metro.csv', row.names = F)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following object is masked from 'package:base':
##
## date
cumcases.TSA <- TSA.cumulative %>% dplyr::select(Date,Level_Name, Cases_Cumulative)
cumcases.TSA$Date <- ymd(cumcases.TSA$Date)
latestdate <- max(cumcases.TSA$Date)
earliestdate <- latestdate - 14
middate <- latestdate-7
week2 <- subset(cumcases.TSA, Date==latestdate, select=Cases_Cumulative) -
subset(cumcases.TSA, Date==middate, select=Cases_Cumulative)
week1 <- subset(cumcases.TSA, Date==middate, select=Cases_Cumulative) -
subset(cumcases.TSA, Date==earliestdate, select=Cases_Cumulative)
current_ratio <- (week2/week1)[,1]
#current_ratio[week1<10] <- NA
current_ratio[current_ratio<0] <- NA
tsa_current_ratio_dat <- data.frame(tsa=subset(cumcases.TSA, Date==latestdate, select=Level_Name)[,1],
current_ratio=current_ratio, current_ratio_cat = cut(current_ratio, breaks=c(0,0.5,.9,1.1,1.5,2,4,8,max(current_ratio))))
cumcases.county <- county.cumulative %>% dplyr::select(Date,Level_Name, Cases_Cumulative)
cumcases.county$Date <- ymd(cumcases.county$Date)
latestdate <- max(cumcases.county$Date, na.rm=T)
earliestdate <- latestdate - 14
middate <- latestdate-7
week2 <- subset(cumcases.county, Date==latestdate, select=Cases_Cumulative) -
subset(cumcases.county, Date==middate, select=Cases_Cumulative)
week1 <- subset(cumcases.county, Date==middate, select=Cases_Cumulative) -
subset(cumcases.county, Date==earliestdate, select=Cases_Cumulative)
current_ratio <- (week2/week1)[,1]
current_ratio[week1<10] <- NA
current_ratio[current_ratio<0] <- NA
county_current_ratio_dat <- data.frame(county=subset(cumcases.county, Date==latestdate, select=Level_Name)[,1], current_ratio=current_ratio, current_ratio_cat = cut(current_ratio, breaks=c(0,0.5,.9,1.1,1.5,2,4,8,max(current_ratio))))
cumcases.metro <- metro.cumulative %>% dplyr::select(Date, Level_Name, Cases_Cumulative)
cumcases.metro$Date <- ymd(cumcases.metro$Date)
latestdate <- max(cumcases.metro$Date, na.rm=T)
earliestdate <- latestdate - 14
middate <- latestdate-7
week2 <- subset(cumcases.metro, Date==latestdate, select=Cases_Cumulative) -
subset(cumcases.metro, Date==middate, select=Cases_Cumulative)
week1 <- subset(cumcases.metro, Date==middate, select=Cases_Cumulative) -
subset(cumcases.metro, Date==earliestdate, select=Cases_Cumulative)
current_ratio <- (week2/week1)[,1]
current_ratio[current_ratio<0] <- NA
metro_current_ratio_dat <- data.frame(metro=subset(cumcases.metro, Date==latestdate, select=Level_Name)[,1], current_ratio=current_ratio, current_ratio_cat = cut(current_ratio, breaks=c(0,0.5,.9,1.1,1.5,2,4,8,max(current_ratio))))
cumcases.state <- state.cumulative %>% dplyr::select(Date, Cases_Cumulative)
cumcases.state$Date <- ymd(cumcases.state$Date)
latestdate <- max(cumcases.state$Date, na.rm=T)
earliestdate <- latestdate - 14
middate <- latestdate-7
week2 <- subset(cumcases.state, Date==latestdate, select=Cases_Cumulative) -
subset(cumcases.state, Date==middate, select=Cases_Cumulative)
week1 <- subset(cumcases.state, Date==middate, select=Cases_Cumulative) -
subset(cumcases.state, Date==earliestdate, select=Cases_Cumulative)
current_ratio <- (week2/week1)[,1]
current_ratio[current_ratio<0] <- NA
state_current_ratio_dat <- data.frame(current_ratio=current_ratio, current_ratio_cat = cut(current_ratio, breaks=c(0,0.5,.9,1.1,1.5,2,4,8,max(current_ratio))))
#export arima results into .csv files
write.csv(state_current_ratio_dat, 'statistical-output/standard-stats/case-ratios/state_case_ratio.csv', row.names = F)
write.csv(tsa_current_ratio_dat,'statistical-output/standard-stats/case-ratios/tsa_case_ratio.csv', row.names = F)
write.csv(county_current_ratio_dat, 'statistical-output/standard-stats/case-ratios/county_case_ratio.csv', row.names = F)
write.csv(metro_current_ratio_dat,'statistical-output/standard-stats/case-ratios/metro_case_ratio.csv', row.names = F)
tsa<-read.csv("combined-datasets/tsa.csv")
tsa2 = tsa %>% group_by(TSA) %>% mutate(Cases_Cumulative_Mean_7Day = rollmean(Cases_Cumulative, 7, fill = NA),
Cases_Daily_Mean_7Day = rollmean(Cases_Daily, 7, fill = NA))
write.csv(tsa2, 'combined-datasets/tsa.csv', row.names = F)
county<-read.csv("combined-datasets/county.csv")
county2 = county %>% group_by(County) %>% mutate(Cases_Cumulative_Mean_7Day = rollmean(Cases_Cumulative, 7, fill = NA),
Cases_Daily_Mean_7Day = rollmean(Cases_Daily, 7, fill = NA))
write.csv(county2, 'combined-datasets/county.csv', row.names = F)
metro <-read.csv("combined-datasets/metro.csv")
metro2 = metro %>% group_by(Metro_Area) %>% mutate(Cases_Cumulative_Mean_7Day = rollmean(Cases_Cumulative, 7, fill = NA),
Cases_Daily_Mean_7Day = rollmean(Cases_Daily, 7, fill = NA))
write.csv(metro2, 'combined-datasets/metro.csv', row.names = F)
state<-xlsx::read.xlsx('combined-datasets/state.xlsx', sheetIndex=1)
state2 = state %>% mutate(Cases_Cumulative_Mean_7Day = rollmean(Cases_Cumulative, 7, fill = NA),
Cases_Daily_Mean_7Day = rollmean(Cases_Daily, 7, fill = NA))
xlsx::write.xlsx(state2, file="combined-datasets/state.xlsx", sheetName="longitudinal", row.names=FALSE)